## packages: remove or add your necessary packages

# required_packages <- c("tidyverse", "readxl", "ggthemes", "hrbrthemes", "extrafont", "plotly", "scales", "stringr", "gganimate", "here", "tidytext", "sentimentr", "scales", "DT", "here", "sm", "mblm", "glue", "fs", "knitr", "rmdformats", "janitor", "urltools", "colorspace", "pdftools", "showtext")

required_packages <- c("tidyverse", "extrafont", "here", "colorspace", "pdftools")


for(i in required_packages) { 
  if(!require(i, character.only = T)) {
    
    #  if package is not existing, install then load the package
    install.packages(i, dependencies = T)
    require(i, character.only = T)
  }
}


## save plots?
save <- TRUE
#save <- FALSE

## quality of png's
dpi <- 750

## font adjust; please adjust to client´s website
#extrafont::loadfonts(device = "win", quiet = TRUE)
#font_add_google("Montserrat", "Montserrat")
# font_add_google("Overpass", "Overpass")
# font_add_google("Overpass Mono", "Overpass Mono")



## theme updates; please adjust to client´s website
#theme_set(ggthemes::theme_clean(base_size = 15))
theme_set(ggthemes::theme_clean(base_size = 15, base_family = "FiraSans-Regular"))


theme_update(plot.margin = margin(30, 30, 30, 30),
             plot.background = element_rect(color = "white",
                                            fill = "white"),
             plot.title = element_text(size = 20,
                                       face = "bold",
                                       lineheight = 1.05,
                                       hjust = .5,
                                       margin = margin(10, 0, 25, 0)),
             plot.title.position = "plot",
             plot.caption = element_text(color = "grey40",
                                         size = 9,
                                         margin = margin(20, 0, -20, 0)),
             plot.caption.position = "plot",
             axis.line.x = element_line(color = "black",
                                        size = .8),
             axis.line.y = element_line(color = "black",
                                        size = .8),
             axis.title.x = element_text(size = 16,
                                         face = "bold",
                                         margin = margin(t = 20)),
             axis.title.y = element_text(size = 16,
                                         face = "bold",
                                         margin = margin(r = 20)),
             axis.text = element_text(size = 11,
                                      color = "black",
                                      face = "bold"),
             axis.text.x = element_text(margin = margin(t = 10)),
             axis.text.y = element_text(margin = margin(r = 10)),
             axis.ticks = element_blank(),
             panel.grid.major.x = element_line(size = .6,
                                               color = "#eaeaea",
                                               linetype = "solid"),
             panel.grid.major.y = element_line(size = .6,
                                               color = "#eaeaea",
                                               linetype = "solid"),
             panel.grid.minor.x = element_line(size = .6,
                                               color = "#eaeaea",
                                               linetype = "solid"),
             panel.grid.minor.y = element_blank(),
             panel.spacing.x = unit(4, "lines"),
             panel.spacing.y = unit(2, "lines"),
             legend.position = "top",
             legend.title = element_text(family = "Montserrat",
                                         color = "black",
                                         size = 14,
                                         margin = margin(5, 0, 5, 0)),
             legend.text = element_text(family = "Montserrat",
                                        color = "black",
                                        size = 11,
                                        margin = margin(4.5, 4.5, 4.5, 4.5)),
             legend.background = element_rect(fill = NA,
                                              color = NA),
             legend.key = element_rect(color = NA, fill = NA),
             #legend.key.width = unit(5, "lines"),
             #legend.spacing.x = unit(.05, "pt"),
             #legend.spacing.y = unit(.55, "pt"),
             #legend.margin = margin(0, 0, 10, 0),
             strip.text = element_text(face = "bold",
                                       margin = margin(b = 10)))

## theme settings for flipped plots
theme_flip <-
  theme(panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_line(size = .6,
                                          color = "#eaeaea"))

## theme settings for maps
theme_map <- 
  theme_void(base_family = "Montserrat") +
  theme(legend.direction = "horizontal",
        legend.box = "horizontal",
        legend.margin = margin(10, 10, 10, 10),
        legend.title = element_text(size = 17, 
                                    face = "bold"),
        legend.text = element_text(color = "grey33",
                                   size = 12),
        plot.margin = margin(15, 5, 15, 5),
        plot.title = element_text(face = "bold",
                                  size = 20,
                                  hjust = .5,
                                  margin = margin(30, 0, 10, 0)),
        plot.subtitle = element_text(face = "bold",
                                     color = "grey33",
                                     size = 17,
                                     hjust = .5,
                                     margin = margin(10, 0, -30, 0)),
        plot.caption = element_text(size = 14,
                                    color = "grey33",
                                    hjust = .97,
                                    margin = margin(-30, 0, 0, 0)))

## numeric format for labels
num_format <- scales::format_format(big.mark = ",", small.mark = ",", scientific = F)

## main color backlinko
bl_col <- "#00d188"
bl_dark <- darken(bl_col, .3, space = "HLS")

## colors + labels for interval stripes
int_cols <- c("#bce2d5", "#79d8b6", bl_col, "#009f66", "#006c45", "#003925")
int_perc <- c("100%", "95%", "75%", "50%", "25%", "5%")

## colors for degrees (Bachelors, Massters, Doctorate in reverse order)
cols_degree <- c("#e64500", "#FFCC00", darken(bl_col, .1))

## gradient colors for position
colfunc <- colorRampPalette(c(bl_col, "#bce2d5"))
pos_cols <- colfunc(10)

load data

samples <- rio::import(here::here("proc_data", "samples_10_10.rds")) %>%
  mutate(
    geodist_miles = round(geodist * 0.62137119224, 2), # the distance in American miles instead of km
    drop = -drop
  )

initial data exploration

Average drop by city:

# just a test with Atlanta:
# atlanta <- samples %>% 
#   filter(City == "Atlanta")
# atlanta_avg <- atlanta %>%
#   group_by(geodist) %>%
#   summarise(avg_drop = mean(drop),
#             med_drop = median(drop))
# ggplot(data = atlanta) +
#   geom_point(mapping = aes(x = geodist, y = drop)) + 
#   geom_smooth(mapping = aes(x = geodist, y = drop), method = "lm", formula = y ~ log(x+0.00000000000001)) +
#   geom_point(data = atlanta_avg, mapping = aes(x = geodist, y = avg_drop), color = "red")

# library(zoo)
# 
# samples_avg <- samples %>%
#   mutate(
#     test = case_when(
#       geodist_miles == 0 ~ 0,
#       (0 < geodist_miles) & (geodist_miles <= 1) ~ 1,
#       (1 < geodist_miles) & (geodist_miles <= 2) ~ 2,
#       (2 < geodist_miles) & (geodist_miles <= 3) ~ 3,
#       (3 < geodist_miles) & (geodist_miles <= 4) ~ 4,
#       (4 < geodist_miles) & (geodist_miles <= 5) ~ 5,
#       (5 < geodist_miles) & (geodist_miles <= 6) ~ 6,
#       (6 < geodist_miles) & (geodist_miles <= 7) ~ 7,
#       geodist_miles > 7 ~ 8,
#       TRUE ~ 100 # geodist_miles
#     ),
#     rough_geodist = ifelse(geodist_miles == 0, 0, cut(geodist_miles, 8))
#   ) %>%
#   group_by(City, rough_geodist) %>%
#   summarise(avg_drop = mean(drop),
#             med_drop = median(drop),
#             n_samples = n()) %>%
#   ungroup()
# 
# 
# 
# # compute rolling average of the drop:
# samples <- samples %>% 
#   mutate(
#     drop_1mile = zoo::rollmean(drop, k = 1, fill = NA, align = "left"),
#     drop_2mile = zoo::rollmean(drop, k = 2, fill = NA, align = "left"),
#     drop_3mile = zoo::rollmean(drop, k = 100, fill = NA, align = "left")
#   )
# 
# ggplot(data = samples) +
#   geom_point(mapping = aes(x = geodist_miles, y = drop), alpha = 0.1) + 
#   facet_wrap(vars(City), ncol = 5) + 
#   scale_y_reverse() + 
#   geom_point(data = samples_avg, mapping = aes(x = rough_geodist, y = avg_drop), color = "red", alpha = 0.5) +
#   geom_line(data = samples_avg, mapping = aes(x = rough_geodist, y = avg_drop), color = "red", alpha = 0.5) +
#   # geom_ma(data = samples, mapping = aes(x = geodist_miles, y = drop), ma_fun = SMA, n = 2, color = "blue") +
#   # geom_line(data = samples, mapping = aes(x = geodist_miles, y = drop_3mile), color = "blue") +
#   # geom_smooth(data = samples, mapping = aes(x = geodist_miles, y = drop), method = "loess")
#   labs(
#     title = "The average drop in position",
#     subtitle = "It varies a lot between cities and law firms"
#   ) +
#   xlab("Distance in Miles to the location of the law firm") +
#   ylab("Drop in the ranking")
# 
# # geom_smooth(mapping = aes(x = geodist, y = drop), method = "loess") + # not super useful
# # geom_point(data = samples_avg, mapping = aes(x = geodist, y = med_drop), color = "blue") + # too confusing to keep, and not particularly different to the mean.


samples_avg <- samples %>%
  mutate(
    rough_geodist = cut(geodist_miles, breaks = seq(0, max(samples$geodist_miles)+1), labels = FALSE), # bin by mile 
    rough_geodist = ifelse(is.na(rough_geodist), 0, rough_geodist) # the bin 0 is 0 instead of NA
  ) %>%
  group_by(City, rough_geodist) %>%
  summarise(avg_drop = mean(drop),
            n_samples = n(),
            sd = sd(drop),
            avg_rank = mean(rank)
            ) %>%
  ungroup()


average_drop <- ggplot(data = samples_avg, mapping = aes(x = rough_geodist, y = avg_drop, group = 1)) +
  geom_point() +
  geom_line() +
  # geom_errorbar(aes(ymin = avg_drop - sd, ymax = avg_drop + sd)) + 
  facet_wrap(vars(City), ncol = 5) + 
  scale_y_continuous(breaks = seq(0,-15) ) + 
  scale_x_continuous(breaks = seq(0,max(samples_avg$rough_geodist)), minor_breaks = NULL) + 
  labs(
    title = "The average drop in position",
    subtitle = "The ranking drops dramatically in the first mile and quickly stabilizes. Distance are binned by mile."
  ) +
  xlab("Distance in miles to the location of the law firm") +
  ylab("Drop in the ranking (indexed at 0)") 

average_drop

if(save == T){ 
  ggsave(here::here("plots", "average_drop_by_city-10.pdf"), average_drop, 
         width = 12.5, height = 8, device = cairo_pdf)
}

Let’s have a look at all the samples in each city:

samples_avg_all <- samples %>%
  mutate(
    rough_geodist = cut(geodist_miles, breaks = seq(0, max(samples$geodist_miles)+1), labels = FALSE), # bin by mile 
    rough_geodist = ifelse(is.na(rough_geodist), 0, rough_geodist) # the bin 0 is 0 instead of NA
  ) %>%
  group_by(City, Sample, rough_geodist) %>%
  summarise(avg_drop = mean(drop),
            avg_rank = mean(rank),
            n_samples = n()) %>%
  ungroup()

average_drop_all_samples <- ggplot(data = samples_avg_all, mapping = aes(x = rough_geodist, y = avg_drop, group = Sample)) +
  # geom_point(alpha = 0.2) +
  geom_line(alpha = 0.4) +
  geom_line(data = samples_avg, mapping = aes(x = rough_geodist, y = avg_drop, group = 1), size = 1.2) +
  facet_wrap(vars(City), ncol = 5) + 
  # scale_y_continuous(breaks = seq(0,-15) ) + 
  scale_x_continuous(breaks = seq(0,max(samples_avg_all$rough_geodist)), minor_breaks = NULL) + 
  labs(
    title = "The drop in position for each law firm",
    subtitle = "The magnitude of the drop varies greatly between law firms; some see almost no drop. \nEach grey line is a sample (a law firm), and their average is in black."
  ) +
  xlab("Distance in miles to the location of the law firm") +
  ylab("Drop in the ranking (indexed at 0)")

average_drop_all_samples

if(save == T){ 
  ggsave(here::here("plots", "average_drop_all_samples-10.pdf"), average_drop_all_samples, 
         width = 12.5, height = 8, device = cairo_pdf)
}

Let’s have a look at their original rank. By doing so, we might be able to differentiate the fate of top-ranking law firms from others.

average_rank_all_samples <- ggplot(data = samples_avg_all, 
                                   mapping = aes(x = rough_geodist, y = avg_rank, group = Sample)) +
  geom_line(alpha = 0.4) +
  geom_line(data = samples_avg, mapping = aes(x = rough_geodist, y = avg_rank, group = 1), size = 1.2) +
  facet_wrap(vars(City), ncol = 5) + 
  scale_y_reverse() +
  scale_x_continuous(breaks = seq(0,max(samples_avg_all$rough_geodist)), minor_breaks = NULL) + 
  labs(
    title = "The rank by distance of each law firm",
    subtitle = "The ranking of the drop varies greatly between law firms; The highest ranking firms drop much less. \nEach grey line is a sample (a law firm), and their average is in black. A rank of 21 means '+20': out of the top 20."
  ) +
  xlab("Distance in miles to the location of the law firm") +
  ylab("Ranking (average, binned by mile)") 

average_rank_all_samples

if(save == T){ 
  ggsave(here::here("plots", "average_rank_all_samples-10.pdf"), average_rank_all_samples, 
         width = 12.5, height = 8, device = cairo_pdf)
}

Conlusion of the first data exploration

We sampled 10 law firms in the 10 largest cities. We found them by searching for ‘car accident lawyers’. For each, we measured their ranking for the keyword “car accident lawyer” at 225 location disposed on a grid of 5 miles radius (100 squared miles) around the original location of the firm.

Key observations:
1. The drop in ranking varies greatly between law firms. Some top-ranking firms do not even see a drop in the 5 miles. Some see a quick drop out of the top 20.
2. When there is a drop, much of the drop occurs in the first mile.
3. Only ~20% of the law firms droped of the top 20 after 5 miles around their location.